1 Load Packages and Data

library(dplyr)
library(lubridate)
library(ggplot2)
library(ggeffects)
library(EloRating)
library(performance)
library(emmeans)

source("/Users/alba/Library/Mobile Documents/com~apple~CloudDocs/Postdoc_UNIL/Pooja_chapter_matrank/Analyses/dharma_diagnostic_fnct.R")

emigrants<-read.csv("/Users/alba/Library/Mobile Documents/com~apple~CloudDocs/Postdoc_UNIL/Pooja_chapter_matrank/Data/All_potential_immigrants.csv")

# load life history IVP, conflict data and group_presence matrices
load("/Users/alba/Library/Mobile Documents/com~apple~CloudDocs/Postdoc_UNIL/Pooja_chapter_matrank/Data/Immigrants_environment.RData")

2 Analysis: Does Tenure Predict Final Rank?

2.1 Prepare Data

# Analysis 1: Does tenure predict final rank?
library(lme4)
library(lmerTest)
library(dplyr)
library(glmmTMB)

# Filter emigrants with Elo rating based on all conflicts AND at least 5 conflicts
emigrants_with_elo_all <- emigrants[!is.na(emigrants$elo_end_tenure_all) & 
                                     emigrants$n_conflicts_end_tenure_all >= 5, ]
#111

# Filter emigrants with Elo rating based on male-male conflicts AND at least 5 male-male conflicts
emigrants_with_elo_male_male <- emigrants[!is.na(emigrants$elo_end_tenure_male_male) & 
                                           emigrants$n_conflicts_end_tenure_male_male >= 5, ]
#104 males

# Exclude tenures of 0
emigrants_with_elo_all <- emigrants_with_elo_all %>% filter(Tenure > 0)
emigrants_with_elo_male_male <- emigrants_with_elo_male_male %>% filter(Tenure > 0)

#some individuals has elo of 0 or 1 which needs to be transformed
# Transform Elo ratings for all conflicts dataset
emigrants_with_elo_all <- emigrants_with_elo_all %>%
  mutate(tr_elo_at_end_tenure_all = case_when(
    elo_end_tenure_all == 0 ~ 0.0001,
    elo_end_tenure_all == 1 ~ 0.9999,
    TRUE ~ elo_end_tenure_all
  ))

# Transform Elo ratings for male-male conflicts dataset
emigrants_with_elo_male_male <- emigrants_with_elo_male_male %>%
  mutate(tr_elo_at_end_tenure_male_male = case_when(
    elo_end_tenure_male_male == 0 ~ 0.0001,
    elo_end_tenure_male_male == 1 ~ 0.9999,
    TRUE ~ elo_end_tenure_male_male
  ))

2.2 Models: All Conflicts

#all conflicts
model15 <- glmmTMB(tr_elo_at_end_tenure_all ~ Tenure*ImmigrationGp1,
                      data = emigrants_with_elo_all, family = beta_family())

#interaction not significant but full-null model significant and important for model stability
check_lmer_assumptions_dharma(model15)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1687 , p-value = 0.0057029 
## ** Model may have poor fit **
## 
## Outlier Test:
## p-value = 0.0095455 
## ** Potential outliers detected **
## 
## Dispersion Test:
## p-value = < 2.22e-16 
## ** Over/under-dispersion detected **
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
#if i drop the interaction (not-significant, bad model fit)
model15_noint <- glmmTMB(tr_elo_at_end_tenure_all ~ Tenure+ImmigrationGp1,
                      data = emigrants_with_elo_all, family = beta_family())
check_lmer_assumptions_dharma(model15_noint)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.147 , p-value = 0.023337 
## ** Model may have poor fit **
## 
## Outlier Test:
## p-value = 0.0095455 
## ** Potential outliers detected **
## 
## Dispersion Test:
## p-value = < 2.22e-16 
## ** Over/under-dispersion detected **
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
#so the interaction needs to be kept

#The lack of fit might be due to the problematic groups
#crossing and LT excluded (bad groups) and IF, which was rank-deficient
emigrants_with_elo_all_sub <- subset(emigrants_with_elo_all, !ImmigrationGp1 %in% c("LT", "CR", "IF"))

model15.sub <- glmmTMB(tr_elo_at_end_tenure_all ~ Tenure*ImmigrationGp1,
                      data = emigrants_with_elo_all_sub, family = beta_family())
check_lmer_assumptions_dharma(model15.sub)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1079 , p-value = 0.3028 
## Model fit appears adequate
## 
## Outlier Test:
## p-value = 0.027247 
## ** Potential outliers detected **
## 
## Dispersion Test:
## p-value = 0.264 
## Dispersion appears appropriate
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
model15_null.sub <- glmmTMB(tr_elo_at_end_tenure_all ~ 1,
                      data = emigrants_with_elo_all_sub, family = beta_family())
anova(model15.sub, model15_null.sub, test="Chisq")
## Data: emigrants_with_elo_all_sub
## Models:
## model15_null.sub: tr_elo_at_end_tenure_all ~ 1, zi=~0, disp=~1
## model15.sub: tr_elo_at_end_tenure_all ~ Tenure * ImmigrationGp1, zi=~0, disp=~1
##                  Df     AIC      BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## model15_null.sub  2   3.208   7.9973  0.3958   -0.792                         
## model15.sub       9 -34.524 -12.9738 26.2619  -52.524 51.732      7  6.593e-09
##                     
## model15_null.sub    
## model15.sub      ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#significant with all conflicts

drop1(model15.sub, test="Chisq")
## Single term deletions
## 
## Model:
## tr_elo_at_end_tenure_all ~ Tenure * ImmigrationGp1
##                       Df     AIC    LRT Pr(>Chi)
## <none>                   -34.524                
## Tenure:ImmigrationGp1  3 -38.751 1.7724    0.621
#again not significant interaction

summary(model15.sub)
##  Family: beta  ( logit )
## Formula:          tr_elo_at_end_tenure_all ~ Tenure * ImmigrationGp1
## Data: emigrants_with_elo_all_sub
## 
##      AIC      BIC   logLik deviance df.resid 
##    -34.5    -13.0     26.3    -52.5       72 
## 
## 
## Dispersion parameter for beta family (): 3.47 
## 
## Conditional model:
##                           Estimate Std. Error z value Pr(>|z|)  
## (Intercept)             -0.1784161  0.5801163  -0.308   0.7584  
## Tenure                   0.0004849  0.0008553   0.567   0.5708  
## ImmigrationGp1BD        -0.7131455  0.6505352  -1.096   0.2730  
## ImmigrationGp1KB        -0.0104987  0.9002971  -0.012   0.9907  
## ImmigrationGp1NH        -1.3442823  0.7215771  -1.863   0.0625 .
## Tenure:ImmigrationGp1BD  0.0006929  0.0009134   0.758   0.4481  
## Tenure:ImmigrationGp1KB  0.0012533  0.0010300   1.217   0.2237  
## Tenure:ImmigrationGp1NH  0.0009103  0.0010216   0.891   0.3729  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#check removing the interaction
model15.sub.noint <- glmmTMB(tr_elo_at_end_tenure_all ~ Tenure+ImmigrationGp1,
                      data = emigrants_with_elo_all_sub, family = beta_family())
check_lmer_assumptions_dharma(model15.sub.noint)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1202 , p-value = 0.19239 
## Model fit appears adequate
## 
## Outlier Test:
## p-value = 0.13659 
## No significant outliers detected
## 
## Dispersion Test:
## p-value = 0.248 
## Dispersion appears appropriate
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
#no outliers detected now and it is the best fit

AIC(model15.sub, model15, model15.sub.noint)
##                   df       AIC
## model15.sub        9 -34.52385
## model15           14 -26.22235
## model15.sub.noint  6 -38.75144
model15.sub.noint_null <- glmmTMB(tr_elo_at_end_tenure_all ~ 1,
                      data = emigrants_with_elo_all_sub, family = beta_family())
anova(model15.sub.noint,model15.sub.noint_null)
## Data: emigrants_with_elo_all_sub
## Models:
## model15.sub.noint_null: tr_elo_at_end_tenure_all ~ 1, zi=~0, disp=~1
## model15.sub.noint: tr_elo_at_end_tenure_all ~ Tenure + ImmigrationGp1, zi=~0, disp=~1
##                        Df     AIC      BIC  logLik deviance Chisq Chi Df
## model15.sub.noint_null  2   3.208   7.9973  0.3958   -0.792             
## model15.sub.noint       6 -38.751 -24.3847 25.3757  -50.751 49.96      4
##                        Pr(>Chisq)    
## model15.sub.noint_null               
## model15.sub.noint       3.681e-10 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model15.sub.noint, test="Chisq")
## Single term deletions
## 
## Model:
## tr_elo_at_end_tenure_all ~ Tenure + ImmigrationGp1
##                Df     AIC    LRT  Pr(>Chi)    
## <none>            -38.751                     
## Tenure          1  -9.991 30.760  2.92e-08 ***
## ImmigrationGp1  3 -28.198 16.554 0.0008729 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model15.sub.noint)
##  Family: beta  ( logit )
## Formula:          tr_elo_at_end_tenure_all ~ Tenure + ImmigrationGp1
## Data: emigrants_with_elo_all_sub
## 
##      AIC      BIC   logLik deviance df.resid 
##    -38.8    -24.4     25.4    -50.8       75 
## 
## 
## Dispersion parameter for beta family ():  3.4 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -0.6386684  0.3163422  -2.019   0.0435 *  
## Tenure            0.0012140  0.0002533   4.793 1.64e-06 ***
## ImmigrationGp1BD -0.2838135  0.3225563  -0.880   0.3789    
## ImmigrationGp1KB  0.9557886  0.4970271   1.923   0.0545 .  
## ImmigrationGp1NH -0.7503258  0.3454650  -2.172   0.0299 *  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1

2.3 Models: Male-Male Conflicts

## Repeat male-male conflicts
model15_mm <- glmmTMB(tr_elo_at_end_tenure_male_male ~ Tenure*ImmigrationGp1,
                      data = emigrants_with_elo_male_male, family = beta_family())
check_lmer_assumptions_dharma(model15_mm)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1372 , p-value = 0.081604 
## Model fit appears adequate
## 
## Outlier Test:
## p-value = 1 
## No significant outliers detected
## 
## Dispersion Test:
## p-value = < 2.22e-16 
## ** Over/under-dispersion detected **
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
#overdispersion

#subgroup exclusion
emigrants_with_elo_male_male_sub <- subset(emigrants_with_elo_male_male, !ImmigrationGp1 %in% c("LT", "CR"))

model15_mm <- glmmTMB(tr_elo_at_end_tenure_male_male ~ Tenure*ImmigrationGp1,
                      data = emigrants_with_elo_male_male_sub, family = beta_family())
check_lmer_assumptions_dharma(model15_mm)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1647 , p-value = 0.042414 
## ** Model may have poor fit **
## 
## Outlier Test:
## p-value = 1 
## No significant outliers detected
## 
## Dispersion Test:
## p-value = < 2.22e-16 
## ** Over/under-dispersion detected **
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
#still overdispersed with and without interaction and when including dispersion parameters

#check where is the problem
# Compare sample sizes and distributions
cat("All conflicts dataset: n =", nrow(emigrants_with_elo_all_sub), "\n")
## All conflicts dataset: n = 81
cat("Male-male dataset: n =", nrow(emigrants_with_elo_male_male), "\n")
## Male-male dataset: n = 85
# Compare Elo distributions
par(mfrow = c(1, 2))
hist(emigrants_with_elo_all_sub$tr_elo_at_end_tenure_all,
     main = "All Conflicts Elo", xlab = "Elo", breaks = 20)
hist(emigrants_with_elo_male_male$tr_elo_at_end_tenure_male_male,
     main = "Male-Male Elo", xlab = "Elo", breaks = 20)

par(mfrow = c(1, 1))

# Check for extreme values
summary(emigrants_with_elo_all_sub$tr_elo_at_end_tenure_all)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0001  0.3330  0.4640  0.4926  0.6290  0.9999
summary(emigrants_with_elo_male_male$tr_elo_at_end_tenure_male_male)
##    Min. 1st Qu.  Median    Mean 3rd Qu.    Max. 
##  0.0001  0.1550  0.4010  0.4587  0.6970  0.9999
# Check for 0s and 1s (even after transformation)
sum(emigrants_with_elo_male_male$tr_elo_at_end_tenure_male_male %in% c(0, 1, 0.0001, 0.9999))
## [1] 31

2.4 Separate Analysis: Moderate vs Top-Ranked Males

The male distribution has a huge peak close to 1 which is making the beta distribution angry. We are going to analyse these extremes separately but they really seem to be two strategies.

#the male distribution has a huge peak close to 1 which is making the beta distribution angry
#we are going to analyse these extremes separately but they really seem to be two strategies

# Separate extreme from non-extreme
emigrants_mm_moderate <- emigrants_with_elo_male_male %>%
  filter(tr_elo_at_end_tenure_male_male < 0.99)  # Exclude near-ceiling

emigrants_mm_extreme <- emigrants_with_elo_male_male %>%
  filter(tr_elo_at_end_tenure_male_male >= 0.99)  # Top-ranked males

cat("Moderate rank males:", nrow(emigrants_mm_moderate), "\n")
## Moderate rank males: 68
cat("Top-ranked males:", nrow(emigrants_mm_extreme), "\n")
## Top-ranked males: 17
# Analyze moderate ranks with beta regression
model15_mm_moderate <- glmmTMB(
  tr_elo_at_end_tenure_male_male ~ Tenure+ImmigrationGp1, 
  data = emigrants_mm_moderate, 
  family = beta_family())

#not significant interaction 
check_lmer_assumptions_dharma(model15_mm_moderate)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1507 , p-value = 0.091098 
## Model fit appears adequate
## 
## Outlier Test:
## p-value = 1 
## No significant outliers detected
## 
## Dispersion Test:
## p-value = 0.12 
## Dispersion appears appropriate
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
#happy

library(ggplot2)
ggplot(emigrants_mm_moderate, aes(x = Tenure, y = tr_elo_at_end_tenure_male_male)) +
  geom_point() +
  geom_smooth(method = "loess") +
  facet_wrap(~ImmigrationGp1)

# Test significance
model15_mm_moderate_null <- glmmTMB(
  tr_elo_at_end_tenure_male_male ~ 1, 
  data = emigrants_mm_moderate, 
  family = beta_family())

anova(model15_mm_moderate, model15_mm_moderate_null, test = "Chisq")
## Data: emigrants_mm_moderate
## Models:
## model15_mm_moderate_null: tr_elo_at_end_tenure_male_male ~ 1, zi=~0, disp=~1
## model15_mm_moderate: tr_elo_at_end_tenure_male_male ~ Tenure + ImmigrationGp1, zi=~0, disp=~1
##                          Df      AIC     BIC logLik deviance  Chisq Chi Df
## model15_mm_moderate_null  2  -97.736 -93.297 50.868  -101.74              
## model15_mm_moderate       8 -101.703 -83.947 58.851  -117.70 15.966      6
##                          Pr(>Chisq)  
## model15_mm_moderate_null             
## model15_mm_moderate         0.01394 *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model15_mm_moderate, test = "Chisq")
## Single term deletions
## 
## Model:
## tr_elo_at_end_tenure_male_male ~ Tenure + ImmigrationGp1
##                Df      AIC     LRT Pr(>Chi)   
## <none>            -101.703                    
## Tenure          1 -101.751  1.9519 0.162385   
## ImmigrationGp1  5  -96.328 15.3746 0.008876 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model15_mm_moderate)
##  Family: beta  ( logit )
## Formula:          tr_elo_at_end_tenure_male_male ~ Tenure + ImmigrationGp1
## Data: emigrants_mm_moderate
## 
##      AIC      BIC   logLik deviance df.resid 
##   -101.7    -83.9     58.9   -117.7       60 
## 
## 
## Dispersion parameter for beta family (): 1.64 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)    
## (Intercept)      -2.3680313  0.5901062  -4.013    6e-05 ***
## Tenure            0.0004632  0.0004002   1.158  0.24707    
## ImmigrationGp1BD  1.4587933  0.5001279   2.917  0.00354 ** 
## ImmigrationGp1CR -0.5799130  1.1504217  -0.504  0.61420    
## ImmigrationGp1KB  0.3672930  0.7964671   0.461  0.64469    
## ImmigrationGp1LT  0.3918694  0.6016989   0.651  0.51487    
## ImmigrationGp1NH  0.8552926  0.5432028   1.575  0.11536    
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#repeat model excluding less habituated groups
emigrants_mm_moderate_sub <- subset(emigrants_mm_moderate, ImmigrationGp1 != c("LT", "CR"))

model15_mm_moderate_sub <- glmmTMB(
  tr_elo_at_end_tenure_male_male ~ Tenure+ImmigrationGp1, 
  data = emigrants_mm_moderate_sub, 
  family = beta_family())

check_lmer_assumptions_dharma(model15_mm_moderate_sub)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.158 , p-value = 0.081895 
## Model fit appears adequate
## 
## Outlier Test:
## p-value = 1 
## No significant outliers detected
## 
## Dispersion Test:
## p-value = 0.064 
## Dispersion appears appropriate
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
#happy

model15_mm_moderate_null_sub <- glmmTMB(
  tr_elo_at_end_tenure_male_male ~ 1, 
  data = emigrants_mm_moderate_sub, 
  family = beta_family())

anova(model15_mm_moderate_sub, model15_mm_moderate_null_sub, test = "Chisq")
## Data: emigrants_mm_moderate_sub
## Models:
## model15_mm_moderate_null_sub: tr_elo_at_end_tenure_male_male ~ 1, zi=~0, disp=~1
## model15_mm_moderate_sub: tr_elo_at_end_tenure_male_male ~ Tenure + ImmigrationGp1, zi=~0, disp=~1
##                              Df     AIC     BIC logLik deviance  Chisq Chi Df
## model15_mm_moderate_null_sub  2 -81.130 -76.812 42.565   -85.13              
## model15_mm_moderate_sub       8 -86.154 -68.883 51.077  -102.15 17.024      6
##                              Pr(>Chisq)   
## model15_mm_moderate_null_sub              
## model15_mm_moderate_sub        0.009195 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model15_mm_moderate_sub, test = "Chisq")
## Single term deletions
## 
## Model:
## tr_elo_at_end_tenure_male_male ~ Tenure + ImmigrationGp1
##                Df     AIC     LRT Pr(>Chi)   
## <none>            -86.154                    
## Tenure          1 -84.535  3.6188 0.057131 . 
## ImmigrationGp1  5 -80.865 15.2891 0.009196 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#pairwise comparisons
emm_groups <- emmeans(model15_mm_moderate_sub, ~ ImmigrationGp1)

# Pairwise comparisons between all groups
pairwise_comparisons <- pairs(emm_groups, adjust = "tukey")
summary(pairwise_comparisons)
##  contrast estimate    SE  df z.ratio p.value
##  AK - BD    -1.517 0.500 Inf  -3.037  0.0288
##  AK - CR     0.566 1.146 Inf   0.494  0.9964
##  AK - KB    -0.312 0.791 Inf  -0.395  0.9988
##  AK - LT    -0.801 0.735 Inf  -1.091  0.8854
##  AK - NH    -0.909 0.542 Inf  -1.678  0.5467
##  BD - CR     2.083 1.077 Inf   1.935  0.3807
##  BD - KB     1.205 0.700 Inf   1.721  0.5177
##  BD - LT     0.716 0.610 Inf   1.174  0.8494
##  BD - NH     0.609 0.348 Inf   1.748  0.5000
##  CR - KB    -0.878 1.245 Inf  -0.706  0.9813
##  CR - LT    -1.367 1.203 Inf  -1.137  0.8661
##  CR - NH    -1.475 1.096 Inf  -1.346  0.7594
##  KB - LT    -0.489 0.889 Inf  -0.550  0.9940
##  KB - NH    -0.596 0.736 Inf  -0.810  0.9657
##  LT - NH    -0.107 0.644 Inf  -0.167  1.0000
## 
## Results are given on the log odds ratio (not the response) scale. 
## P value adjustment: tukey method for comparing a family of 6 estimates
#AK-BD differ

Males immigrating to BD achieve significantly higher competitive rankings (Elo) at the end of their tenure compared to males immigrating to AK. All other groups are statistically similar to each other.

2.5 Top-Ranked Males: Does Tenure Predict Reaching Top Rank?

# Logistic regression: top rank (yes/no) ~ tenure
emigrants_with_elo_male_male$top_rank <- 
  emigrants_with_elo_male_male$tr_elo_at_end_tenure_male_male >= 0.99

# Logistic model
model_top_rank <- glmmTMB(
  top_rank ~ Tenure+ImmigrationGp1, 
  data = emigrants_with_elo_male_male, 
  family = binomial())

model_top_rank_null <- glmmTMB(
  top_rank ~ 1, 
  data = emigrants_with_elo_male_male, 
  family = binomial())

check_lmer_assumptions_dharma(model_top_rank)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1186 , p-value = 0.16882 
## Model fit appears adequate
## 
## Outlier Test:
## p-value = 1 
## No significant outliers detected
## 
## Dispersion Test:
## p-value = 0.856 
## Dispersion appears appropriate
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
anova(model_top_rank, model_top_rank_null)
## Data: emigrants_with_elo_male_male
## Models:
## model_top_rank_null: top_rank ~ 1, zi=~0, disp=~1
## model_top_rank: top_rank ~ Tenure + ImmigrationGp1, zi=~0, disp=~1
##                     Df    AIC     BIC  logLik deviance  Chisq Chi Df Pr(>Chisq)
## model_top_rank_null  1 87.068  89.511 -42.534   85.068                         
## model_top_rank       7 85.434 102.532 -35.717   71.434 13.634      6      0.034
##                      
## model_top_rank_null  
## model_top_rank      *
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_top_rank)
##  Family: binomial  ( logit )
## Formula:          top_rank ~ Tenure + ImmigrationGp1
## Data: emigrants_with_elo_male_male
## 
##      AIC      BIC   logLik deviance df.resid 
##     85.4    102.5    -35.7     71.4       78 
## 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)      -1.283e+00  8.107e-01  -1.582   0.1136  
## Tenure            1.189e-03  6.501e-04   1.829   0.0674 .
## ImmigrationGp1BD -2.026e+00  8.853e-01  -2.289   0.0221 *
## ImmigrationGp1CR  5.286e-01  1.498e+00   0.353   0.7242  
## ImmigrationGp1KB -2.103e+01  1.615e+04  -0.001   0.9990  
## ImmigrationGp1LT -5.860e-01  9.555e-01  -0.613   0.5397  
## ImmigrationGp1NH -1.018e+00  8.838e-01  -1.152   0.2492  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model_top_rank, test = "Chisq")
## Single term deletions
## 
## Model:
## top_rank ~ Tenure + ImmigrationGp1
##                Df    AIC     LRT Pr(>Chi)  
## <none>            85.434                   
## Tenure          1 87.520  4.0863  0.04323 *
## ImmigrationGp1  5 85.633 10.1995  0.06977 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
#repeat with most habituated groups
emigrants_with_elo_male_male_sub$top_rank <- 
  emigrants_with_elo_male_male_sub$tr_elo_at_end_tenure_male_male >= 0.99

model_top_rank <- glmmTMB(
  top_rank ~ Tenure+ImmigrationGp1, 
  data = emigrants_with_elo_male_male_sub, 
  family = binomial())

model_top_rank_null <- glmmTMB(
  top_rank ~ 1, 
  data = emigrants_with_elo_male_male_sub, 
  family = binomial())

check_lmer_assumptions_dharma(model_top_rank)
## === DHARMa MODEL ASSUMPTION CHECKS ===
## Model: Model

## 
## === STATISTICAL TESTS ===
## Uniformity Test (KS test):
## D = 0.1186 , p-value = 0.2505 
## Model fit appears adequate
## 
## Outlier Test:
## p-value = 1 
## No significant outliers detected
## 
## Dispersion Test:
## p-value = 0.848 
## Dispersion appears appropriate
## 
## Zero-inflation Test:
## p-value = 1 
## No zero-inflation detected
anova(model_top_rank, model_top_rank_null)
## Data: emigrants_with_elo_male_male_sub
## Models:
## model_top_rank_null: top_rank ~ 1, zi=~0, disp=~1
## model_top_rank: top_rank ~ Tenure + ImmigrationGp1, zi=~0, disp=~1
##                     Df    AIC    BIC  logLik deviance Chisq Chi Df Pr(>Chisq)  
## model_top_rank_null  1 66.513 68.776 -32.257   64.513                          
## model_top_rank       5 66.417 77.731 -28.209   56.417 8.096      4    0.08812 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
summary(model_top_rank)
##  Family: binomial  ( logit )
## Formula:          top_rank ~ Tenure + ImmigrationGp1
## Data: emigrants_with_elo_male_male_sub
## 
##      AIC      BIC   logLik deviance df.resid 
##     66.4     77.7    -28.2     56.4       66 
## 
## 
## Conditional model:
##                    Estimate Std. Error z value Pr(>|z|)  
## (Intercept)      -1.166e+00  8.452e-01  -1.380   0.1677  
## Tenure            1.033e-03  7.387e-04   1.398   0.1621  
## ImmigrationGp1BD -1.975e+00  8.836e-01  -2.235   0.0254 *
## ImmigrationGp1KB -2.094e+01  1.676e+04  -0.001   0.9990  
## ImmigrationGp1NH -9.987e-01  8.781e-01  -1.137   0.2554  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
drop1(model_top_rank, test = "Chisq")
## Single term deletions
## 
## Model:
## top_rank ~ Tenure + ImmigrationGp1
##                Df    AIC    LRT Pr(>Chi)  
## <none>            66.417                  
## Tenure          1 66.810 2.3928  0.12190  
## ImmigrationGp1  3 67.338 6.9205  0.07447 .
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
# Interpretation: Does longer tenure increase probability of reaching top rank? nop

2.6 Define Colors

all_groups <- c("AK", "BD", "CR", "IF", "KB", "LT", "NH")
group_colors <- c(
  "AK" = "#00CC99",
  "BD" = "#1F78B4", 
  "CR" = "#7570B3",
  "IF" = "#660000",
  "KB" = "#336633",
  "LT" = "#E6AB02",
  "NH" = "#FF6600")

3 Visualizations

library(ggeffects)
library(ggplot2)
library(patchwork)

# Custom colors
all_groups <- c("AK", "BD", "CR", "IF", "KB", "LT", "NH")
group_colors <- c(
  "AK" = "#00CC99",
  "BD" = "#1F78B4", 
  "CR" = "#7570B3",
  "IF" = "#660000",
  "KB" = "#336633",
  "LT" = "#E6AB02",
  "NH" = "#FF6600")

# === PLOT 1: All Conflicts (with group sample sizes) ===
pred_all <- ggpredict(model15.sub.noint, terms = "Tenure [all]")


# Add group sample sizes for Plot 1
group_counts_all <- emigrants_with_elo_all_sub %>%
  group_by(ImmigrationGp1) %>%
  summarise(n = n()) %>%
  mutate(label = paste0(ImmigrationGp1, " (n=", n, ")"))

group_labels_all <- setNames(group_counts_all$label, group_counts_all$ImmigrationGp1)

plot1 <- ggplot() +
  geom_ribbon(data = as.data.frame(pred_all),
              aes(x = x, ymin = conf.low, ymax = conf.high),
              alpha = 0.2, fill = "grey50") +
  geom_line(data = as.data.frame(pred_all),
            aes(x = x, y = predicted),
            size = 1.5, color = "black") +
  geom_point(data = emigrants_with_elo_all_sub,
             aes(x = Tenure, y = tr_elo_at_end_tenure_all, color = ImmigrationGp1),
             size = 3, alpha = 0.7) +
  annotate("text", x = -Inf, y = Inf, label = "A",
           hjust = -0.5, vjust = 1.5, size = 6, fontface = "bold") +
  scale_color_manual(values = group_colors, labels = group_labels_all) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
  labs(x = "Tenure (Days)",
       y = "Elo Rating at End of Tenure\n(All Conflicts)",
       color = NULL) +
  theme_classic(base_size = 14) +
  theme(legend.position = "top",
        legend.direction = "horizontal",
        legend.spacing.y = unit(-0.1, "cm"))

# === PLOT 2: Male-Male Moderate Ranks ===
pred_mm_moderate <- ggpredict(model15_mm_moderate, terms = c("Tenure [all]", "ImmigrationGp1"))

group_counts_mm <- emigrants_mm_moderate %>%
  group_by(ImmigrationGp1) %>%
  summarise(n = n()) %>%
  mutate(label = paste0(ImmigrationGp1, " (n=", n, ")"))

group_labels_mm <- setNames(group_counts_mm$label, group_counts_mm$ImmigrationGp1)

plot2 <- ggplot() +
  geom_ribbon(data = as.data.frame(pred_mm_moderate),
              aes(x = x, ymin = conf.low, ymax = conf.high, fill = group),
              alpha = 0.1, color = NA) +
  geom_line(data = as.data.frame(pred_mm_moderate),
            aes(x = x, y = predicted, color = group),
            size = 1.5) +
  geom_point(data = emigrants_mm_moderate,
             aes(x = Tenure, y = tr_elo_at_end_tenure_male_male, color = ImmigrationGp1),
             size = 3, alpha = 0.7) +
  annotate("text", x = -Inf, y = Inf, label = "B",
           hjust = -0.5, vjust = 1.5, size = 6, fontface = "bold") +
  scale_color_manual(values = group_colors, labels = group_labels_mm) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE)) +
  scale_fill_manual(values = group_colors, guide = "none") +
  labs(x = "Tenure (Days)",
       y = "Elo Rating at End of Tenure\n(Male-Male, Elo < 0.99)",
       color = NULL,
       fill = NULL) +
  theme_classic(base_size = 14) +
  theme(legend.position = "top",
        legend.direction = "horizontal",
        legend.spacing.y = unit(-0.1, "cm"))

# === PLOT 3: Top Rank Probability (shortened "t" instead of "top") ===
# Convert top_rank to numeric (0/1) instead of logical (TRUE/FALSE)
emigrants_with_elo_male_male$top_rank <- 
  as.numeric(emigrants_with_elo_male_male$tr_elo_at_end_tenure_male_male >= 0.99)

# Now fit the model with numeric top_rank
model_top_rank <- glmmTMB(
  top_rank ~ Tenure + ImmigrationGp1, 
  data = emigrants_with_elo_male_male, 
  family = binomial())

pred_top_rank <- ggpredict(model_top_rank, terms = c("Tenure [all]", "ImmigrationGp1"))

group_counts_top <- emigrants_with_elo_male_male %>%
  group_by(ImmigrationGp1) %>%
  summarise(
    n = n(),
    n_top = sum(as.numeric(top_rank))  # Convert factor to numeric, subtract 1
  ) %>%
  mutate(label = paste0(ImmigrationGp1, " (n=", n, ", t=", n_top, ")"))

group_labels_top <- setNames(group_counts_top$label, group_counts_top$ImmigrationGp1)

plot3 <- ggplot() +
  geom_ribbon(data = as.data.frame(pred_top_rank),
              aes(x = x, ymin = conf.low, ymax = conf.high, fill = group),
              alpha = 0.1, color = NA) +
  geom_line(data = as.data.frame(pred_top_rank),
            aes(x = x, y = predicted, color = group),
            size = 1.5) +
  geom_point(data = emigrants_with_elo_male_male,
             aes(x = Tenure, y = as.numeric(top_rank), color = ImmigrationGp1),
             size = 3, alpha = 0.7,
             position = position_jitter(height = 0.02, width = 0)) +
  annotate("text", x = -Inf, y = Inf, label = "C",
           hjust = -0.5, vjust = 1.5, size = 6, fontface = "bold") +
  scale_color_manual(values = group_colors, labels = group_labels_top) +
  scale_fill_manual(values = group_colors, labels = group_labels_top) +
  guides(color = guide_legend(nrow = 3, byrow = TRUE), fill = "none") +
  scale_y_continuous(breaks = c(0, 1), labels = c("No", "Yes")) +
  labs(x = "Tenure (Days)",
       y = "Reached Top Rank\n(Elo ≥ 0.99)",
       color = NULL,
       fill = NULL) +
  theme_classic(base_size = 14) +
  theme(legend.position = "top",
        legend.direction = "horizontal",
        legend.spacing.y = unit(-0.1, "cm"))

# === COMBINE PLOTS in 1 row x 3 columns ===
combined_plot <- plot1 | plot2 | plot3
print(combined_plot)

4 Characteristics of Top-Ranked Males

Note: This chunk can only be run after script six (6. Analyses_socbehav_matrank) has been run.

library(dplyr)
library(knitr)
library(kableExtra)

source("/Users/alba/Library/Mobile Documents/com~apple~CloudDocs/Postdoc_UNIL/Pooja_chapter_matrank/Analyses/sex_ratio_calculator_fnct.R")

# Extract the top-ranked males from each dataset 
top_ranked_males <- emigrants_mm_extreme$Code

emigrants_top_ranked <- emigrants %>%
  filter(Code %in% top_ranked_males)

network_grooming_top_ranked <- xdata_groom_all %>%
  filter(immigrant_code %in% top_ranked_males)

network_proximity_top_ranked <- xdata_scan_sub %>%
  filter(immigrant_code %in% top_ranked_males)

# Combine all information
top_ranked_table <- emigrants_top_ranked %>%
  left_join(network_grooming_top_ranked, 
            by = c("Code"="immigrant_code"),
            suffix = c("", "_groom")) %>%
  left_join(network_proximity_top_ranked, 
            by = c("Code"="immigrant_code"),
            suffix = c("", "_prox"))

#calculate sex ratio on dispersing groups
top_ranked_table <- calculate_sex_ratio_at_immigration(
  emigrants = top_ranked_table,
  life_hist = life_hist,
  group_presence_matrices = group_presence_matrices)

# Select relevant columns
top_ranked_summary <- top_ranked_table %>%
  select(Code, ImmigrationGp1, Tenure, BirthGp,
         grooming_rate, eigenvector_centrality, total_degree_normalized, degree_normalized, strength_assoc, eigenvector_centrality_prox = eigenvector_centrality_prox, DateImmigration1, sex_ratio_mf, prop_male)  # from proximity

# Create a nice table
kable(top_ranked_summary,
      caption = "Top-Ranked Males: Demographics and Network Metrics",
      digits = 2,
      col.names = c("Name", "Group", "Tenure (days)", "Birth Group",
                    "Grooming Rate", "Eigen. Centrality (Groom)", "Groom degree", "Prox. degree", "Association Strength", "Eigen. Centrality (Prox)", "Immigration date", "Sex ratio ImmGp", "Proportion males")) %>%
  kable_styling(bootstrap_options = c("striped", "hover", "condensed"),
                full_width = FALSE)

library(flextable)
ft <- flextable(top_ranked_summary) %>%
  set_caption("Top-Ranked Males: Demographics and Network Metrics") %>%
  colformat_double(digits = 2) %>%
  autofit()

summary(top_ranked_table$sex_ratio_mf)

#compare to non-top ranking
emigrants <- calculate_sex_ratio_at_immigration(
  emigrants = emigrants,
  life_hist = life_hist,
  group_presence_matrices = group_presence_matrices)

non_top_ranking_males <- emigrants %>%
  filter(!Code %in% top_ranked_table$Code) %>%
  left_join(xdata_groom_all %>% 
              select(Code = immigrant_code, grooming_rate, eigenvector_centrality, total_degree_normalized),
            by = "Code") %>%
  left_join(xdata_scan_sub %>% 
            select(Code = immigrant_code, strength_assoc, eigenvector_centrality_prox = eigenvector_centrality, degree_normalized),
            by = "Code")

# Compare sex ratios
cat("=== TOP-RANKING MALES (Elo >= 0.99) ===\n")
cat("N =", nrow(top_ranked_table), "\n")
summary(top_ranked_table[, c("total_group_size", "sex_ratio_mf", "prop_male", "prop_unknown_sex")])

cat("\n=== NON-TOP-RANKING MALES (Elo < 0.99) ===\n")
cat("N =", nrow(non_top_ranking_males), "\n")
summary(non_top_ranking_males[, c("total_group_size", "sex_ratio_mf", "prop_male", "prop_unknown_sex")])

# t-test for sex ratio
t_test_ratio <- t.test(top_ranked_table$sex_ratio_mf, 
                       non_top_ranking_males$sex_ratio_mf,
                      na.action = na.omit)
print(t_test_ratio)
#not significantly different

# Define variables to test
test_vars <- c(
  sex_ratio_mf = "Sex Ratio (M/F)",
  prop_male = "Proportion Male",
  total_group_size = "Group Size at Immigration",
  Tenure = "Tenure",
  grooming_rate = "Grooming Rate",
  eigenvector_centrality = "Eigenvector Centrality (Grooming)",
  total_degree_normalized = "Total Degree Normalized",
  strength_assoc = "Association Strength (Proximity)",
  eigenvector_centrality_prox = "Eigenvector Centrality (Proximity)",
  degree_normalized = "Degree Normalized (Proximity)")

results_list <- list()
effect_sizes <- list()

library(effsize)
for (var in names(test_vars)) {
  
  # t-test
  t_result <- t.test(top_ranked_table[[var]], 
                     non_top_ranking_males[[var]],
                    var.equal = FALSE,#this is the Welch correction to control for sample size imbalance and potential differences in
                     na.action = na.omit)
  
  # Effect size
  cohen <- cohen.d(top_ranked_table[[var]], 
                   non_top_ranking_males[[var]],
                   na.rm = TRUE)

  # Store results
  results_list[[var]] <- list(
    variable = test_vars[var],
    top_mean = mean(top_ranked_table[[var]], na.rm = TRUE),
    top_sd = sd(top_ranked_table[[var]], na.rm = TRUE),
    non_top_mean = mean(non_top_ranking_males[[var]], na.rm = TRUE),
    non_top_sd = sd(non_top_ranking_males[[var]], na.rm = TRUE),
    t_statistic = t_result$statistic,
    df = t_result$parameter,
    p_value = t_result$p.value,
    cohen_d = cohen$estimate,
    magnitude = cohen$magnitude
  )
}

# Print effect sizes summary
cat("\n=== EFFECT SIZES (Cohen's d) ===\n")
for (var in names(results_list)) {
  cat(sprintf("%s: d = %.3f (%s), t(%.1f) = %.3f, p = %.4f\n", 
              results_list[[var]]$variable,
              results_list[[var]]$cohen_d,
              results_list[[var]]$magnitude,
              results_list[[var]]$df,
              results_list[[var]]$t_statistic,
              results_list[[var]]$p_value))
}

results_table <- do.call(rbind, lapply(names(results_list), function(var) {
  data.frame(
    Variable = results_list[[var]]$variable,
    Cohen_d = results_list[[var]]$cohen_d,
    Magnitude = results_list[[var]]$magnitude,
    df = results_list[[var]]$df,
    t_statistic = results_list[[var]]$t_statistic,
    p_value = results_list[[var]]$p_value
  )
}))

save_as_docx(ft, path = "/Users/alba/Library/Mobile Documents/com~apple~CloudDocs/Postdoc_UNIL/Pooja_chapter_matrank/Data/top_ranked_males_table.docx")

write.csv(results_table, "/Users/alba/Library/Mobile Documents/com~apple~CloudDocs/Postdoc_UNIL/Pooja_chapter_matrank/Data/top_nontop_effect_sizes_table.csv", row.names = FALSE)

Analysis completed: 2026-01-22